# @title Testing
import torch
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback, AutoModelForImageClassification
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import numpy as np
import evaluate
import numpy as np
import torch
roc_auc_metric = evaluate.load("roc_auc", "multiclass")
accuracy_metric = evaluate.load("accuracy")
precision_metric = evaluate.load("precision")
f1_metric =evaluate.load("f1")
recall_metric =evaluate.load("recall")
def compute_metrics(eval_pred):
logits, labels = eval_pred
probs = torch.softmax(torch.tensor(logits, dtype=torch.float32), dim=1).numpy()
labels = labels.astype(np.int32)
predictions = np.argmax(logits, axis=-1)
results = {}
results["roc_auc"] = roc_auc_metric.compute(
references=labels,
prediction_scores=probs,
multi_class="ovr",
)["roc_auc"]
results["accuracy"] = accuracy_metric.compute(
predictions=predictions,
references=labels,
)["accuracy"]
results["precision"] = precision_metric.compute(
predictions=predictions,
references=labels,
average="macro",
)["precision"]
results["f1"] = f1_metric.compute(
predictions=predictions,
references=labels,
average="macro",
)["f1"]
results["recall"] = recall_metric.compute(
predictions=predictions,
references=labels,
average="macro",
)["recall"]
return results
checkpoint_path = "./checkpoint-13000"
model = AutoModelForImageClassification.from_pretrained(
checkpoint_path,
use_safetensors=True
)
training_args = TrainingArguments(
eval_strategy = "steps",
save_strategy = "steps",
learning_rate = 1e-4,
weight_decay = 1e-4,
num_train_epochs = 10,
save_steps=500,
eval_steps=500,
load_best_model_at_end=True,
metric_for_best_model="eval_loss",
greater_is_better=False,
logging_steps = 500,
report_to = "tensorboard",
save_total_limit=3,
remove_unused_columns=False
)
trainer = Trainer(
model=model,
data_collator = collator,
train_dataset=dataset,
args=training_args,
eval_dataset=dataset,
compute_metrics=compute_metrics,
callbacks=[EarlyStoppingCallback(early_stopping_patience=5)],
)
test_results = trainer.evaluate(dataset)
print(test_results)
pd.DataFrame(list(test_results.items()), columns=["Metric", "Value"])
/Users/vidhya/Library/Python/3.11/lib/python/site-packages/torch/utils/data/dataloader.py:692: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.
warnings.warn(warn_msg)
/Users/vidhya/Library/Python/3.11/lib/python/site-packages/sklearn/metrics/_classification.py:1731: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled
{'eval_train_loss': 0.8428382873535156, 'eval_train_model_preparation_time': 0.0024, 'eval_train_roc_auc': 0.9598214285714286, 'eval_train_accuracy': 0.625, 'eval_train_precision': 0.6541666666666666, 'eval_train_f1': 0.5839285714285714, 'eval_train_recall': 0.625, 'eval_train_runtime': 6.4443, 'eval_train_samples_per_second': 2.483, 'eval_train_steps_per_second': 0.31}
|
Metric |
Value |
| 0 |
eval_train_loss |
0.842838 |
| 1 |
eval_train_model_preparation_time |
0.002400 |
| 2 |
eval_train_roc_auc |
0.959821 |
| 3 |
eval_train_accuracy |
0.625000 |
| 4 |
eval_train_precision |
0.654167 |
| 5 |
eval_train_f1 |
0.583929 |
| 6 |
eval_train_recall |
0.625000 |
| 7 |
eval_train_runtime |
6.444300 |
| 8 |
eval_train_samples_per_second |
2.483000 |
| 9 |
eval_train_steps_per_second |
0.310000 |